flowchart TD
subgraph 01_load
A[ncbi_data] -->|"write_tsv"| B[01_ncbi_data.tsv.gz]
I["annotation_data$probes"] --> |"write_tsv"| J["01_ncbi_annot_probes.tsv.gz"]
end
subgraph 02_clean
B[01_ncbi_data.tsv.gz] -->|"read_tsv"| C[ncbi_data]
C[ncbi_data] -->|"as_tibble |> <br/>filter(!str_detect) |> <br/>slice(41:n) |> <br/>t()"| D[ncbi_dirty_pheno]
D[ncbi_dirty_pheno] --> |"colnames[1, ] |> <br/>str_replace_all('!sample_', ' ')"| E[ncbi_dirty_pheno]
E[ncbi_dirty_pheno] --> |"[-1, ] |> <br/>as_tibble() |> <br/>rename_with()"| F[ncbi_dirty_pheno]
F[ncbi_dirty_pheno] --> |"select() |> <br/>mutate() |> <br/>select() |> <br/>t() |> <br/>as.data.frame() |> <br/>rename_with() |> <br/>as_tibble()"| H[ncbi_clean_pheno]
C[ncbi_data] --> G[ncbi_dirty_expr]
G[ncbi_dirty_expr] --> |"slice() |> <br/>colnames() |> <br/>slice() |> <br/>unlist()"| L[ncbi_dirty_expr]
L[ncbi_dirty_expr] --> |"rename() |> <br/>mutate(across(-probe.id, ~ as numeric(.x))) "| M[ncbi_dirty_expr]
J["01_ncbi_annot_probes.tsv.gz"] -->|"read_tsv()"| N["ncbi_annot_probes"]
N["ncbi_annot_probes"] --> |"select()"| O["gene_annotation"]
O["gene_annotation"] ---|"select()"| P( ):::empty
M["ncbi_dirty_expr"] --- P( )
P( ) -->|"right_join(by = c('Probe_Id' = 'probe_id')) |> <br/>rename() |> <br/>select()"| Q["ncbi_clean_expression"]
end
Group 9 presentation